# Directory structure
github_dir <- file.path("G:\\Shared drives\\Nord Lab - Computational Projects\\MIA\\eLIFE_Clean_code_for_GitHub")
setwd(github_dir)
# Global R markdown code chunk options
knitr::opts_chunk$set(message=FALSE,
warning = FALSE,
error=FALSE,
echo=TRUE,
cache=TRUE,
fig.width = 7, fig.height = 6,
fig.align = 'left')
# R packages
library(sva)
library(RUVnormalize)
library(pheatmap)
library(edgeR)
library(GenomicFeatures)
library(mclust)
library(parallel)
library(ggplot2)
library(reshape2)
library(dplyr)
library(data.table)
library(DT)
library(RColorBrewer)
library(knitr)
library(ggrepel)
library(gridExtra)
library(grid)
library(plyr)
library(plotly)
library(Hmisc)
# Reads input files
setwd("G:/Shared drives/Nord Lab - Computational Projects/MIA/eLIFE_Clean_code_for_GitHub/")
exp.data <- read.csv("./GEO_submission files/Gene_counts.csv")
rownames(exp.data) <- exp.data$X
exp.data$X <- NULL
sample.info <- read.csv("./GEO_submission files/metadata.csv")
load("./GEO_submission files/exonic.gene.sizes")
sample.info_submission <- read.csv("GEO_submission files/Supplementary Table 5, RNA-seq sample metadata_03_02_2021.csv")
datatable(sample.info_submission,
rownames = FALSE,
filter = list(position = 'top', clear = FALSE, plain = FALSE),
options = list(pageLength = 10, scrollX=T, scrollY=T))
A threshold of 1000 counts of Xist gene was used to identify sex
# Qualifies F or M based on the Xist expression
sex.by.rna <- c(ifelse(exp.data["Xist",]>1000,"F","M")) #
Xist_exp <- as.data.frame(reshape2::melt(exp.data["Xist",]))
Xist_exp <- cbind(Xist_exp, sex.by.rna)
Xist_exp <- arrange(Xist_exp, value)
colnames(Xist_exp) <- c("variable", "counts", "sex.by.rna")
ggplot(Xist_exp, aes(x=sex.by.rna, y=counts, colours=sex.by.rna))+
geom_jitter()+
geom_boxplot(alpha=0.2)+
theme_bw()+
labs(title="Xist read counts", x = "", y = "Read counts")+
theme(plot.title = element_text(size = rel(2), hjust=0.5))+
scale_x_discrete(breaks= c("F", "M"), labels=c("Females", "Males"))
# Calculates RPKM values
# Gene lengths calculated with lapply
gene.lengths <- as.numeric(lapply(1:nrow(exp.data), function(x) FUN= as.numeric(exonic.gene.sizes[rownames(exp.data)[x]])))
#
rpkm.data <- rpkm(exp.data, gene.length=gene.lengths, log=T, prior.count=.25)
rpkm.data_linear <- rpkm(exp.data, gene.length=gene.lengths, log=F)
rpkm.data_linear_all <- rpkm.data_linear
#write.csv(rpkm.data, file = "GEO_submission #files/rpkm_log2_24015.csv")
#write.csv(rpkm.data_linear, file = "GEO_submission #files/rpkm_linear_24015.csv")
### Removes genes with low expression ###
# Lets plot a histogram of all rpkm values in a dataset.
#dim(rpkm.data) #24015 rows and 74 columns
df <- reshape2::melt(rpkm.data)
df$rpkm_linear <- 2^df$value # This converts the log2 RPKM values back to linear scale
colnames(df) <- c("gene_name", "sample", "rpkm_log2","rpkm_linear")
# Histogram of all rpkm values in a dataset.
ggplot(df, aes(x=rpkm_log2))+
geom_histogram(bins = 1000, color="black")+
theme_bw()+
labs(title="Unfiltered dataset of 24015 genes",
x="log2 RPKM", y = "Read counts")+
theme(plot.title = element_text(hjust = 0.5))
# Setting the threshold for minimum rpkm value in at least 2 samples.
threshold = -2
# Nymber of genes after filtering
#sum(reshape2::melt(rowSums(rpkm.data > threshold) >= 2)$value) # 17051
# Let's filter the dataset and plot the histogram distribution
keep <- as.data.frame(reshape2::melt(rowSums(rpkm.data > threshold) >= 2))
keep$gene_name <- rownames(keep)
keep <- filter(keep, value == "TRUE")$gene_name
# NOTE: The set of included genes reported in the manuscript was determined using expression criteria from an expanded set of samples including a condition that was not used for the manuscript. The difference between the gene counts is 17195 in the manuscript from the larger set of samples, compared to 17051 genes that would pass in the set of samples used in the MIA vs. control comparison. Note that there is no difference among DEG passing FDR < 0.05 using either gene set.
original_exp.data <- read.csv("GEO_submission files/Count_gene_set_from_initial_submission.csv")
# length(keep) # 17051
# length(original_exp.data$gene_name) # 17195
keep <- original_exp.data$gene_name
#
datExpr_test <- as.data.frame(rpkm.data)
datExpr_test$gene_name <- rownames(datExpr_test)
# Filters the "keep" genes
datExpr_test <- filter(datExpr_test, gene_name %in% keep)
# Generates a matrix-type data frame
rownames(datExpr_test) <- datExpr_test$gene_name
datExpr_test$gene_name <- NULL
# Overwrites the original datExpr object.
datExpr <- datExpr_test
# Plots the histogram after filtering
df <- reshape2::melt(datExpr)
df$rpkm_linear <- 2^df$value # This converts the log2 RPKM values back to linear scale
colnames(df) <- c("sample", "rpkm_log2","rpkm_linear")
ggplot(df, aes(x=rpkm_log2))+
geom_histogram(bins = 1000, color="black")+
theme_bw()+
labs(title = "Dataset of 17195 genes \n after excluding genes expressed at a very low level", x="log2 RPKM", y = "Read counts")+
theme(plot.title = element_text(hjust = 0.5))
# Removing low expression genes. Overwrites exp.data object used for DE analysis ###
#dim(exp.data) #exp.data contains counts, datExpr contains rpkms
exp.data$gene_name <- rownames(exp.data)
exp.data <- filter(exp.data, gene_name %in% rownames(datExpr))
rownames(exp.data) <- exp.data$gene_name
exp.data$gene_name <- NULL
# Recalculates rpkm values
# Gene lengths calculated with lapply
gene.lengths <- as.numeric(lapply(1:nrow(exp.data), function(x) FUN= as.numeric(exonic.gene.sizes[rownames(exp.data)[x]])))
# RPKM calculation
rpkm.data <- rpkm(exp.data, gene.length=gene.lengths, log=T, prior.count=.25)
rpkm.data_linear <- rpkm(exp.data, gene.length=gene.lengths, log=F)
#write.csv(rpkm.data, file = "GEO_submission #files/rpkm_log2_17051.csv")
#write.csv(rpkm.data_linear, file = "GEO_submission #files/rpkm_linear_17051.csv")
setwd(github_dir)
group <- ifelse(sample.info[,"Condition"]=="Saline",1,2)
rRNA <- as.numeric(sample.info$rRNA)
sex.by.rna <- factor(sample.info$sex.by.rna)
dpc <- factor(sample.info$DPC)
response <- factor(sample.info$Response)
lane <- factor(sample.info$Lane)
source("Dimensionality reduction plots.r")
grid.arrange(PCA_plot, PCA_plot_sex, ncol = 2)
# 19.5 was used as a numerical representation of the P0 time point.
test.dpc <- as.factor(sample.info$DPC)
test.sex.by.rna <- as.factor(sample.info$sex_by_rna)
test.response <- as.factor(sample.info$Response)
test.rRNA <- sample.info$rRNA
test.group <- as.factor(sample.info$Condition)
test.lane <- as.factor(sample.info$Lane)
test.data <- exp.data
min.cpm.criteria <- 0.1
min.cpm <- min.cpm.criteria
#qCML expression modeling
y <- DGEList(counts=test.data, group=test.group)
keep <- rowSums(cpm(y)>min.cpm) >=2
y <- y[keep, , keep.lib.sizes=FALSE]
## Perform simple exact test on group
y <- calcNormFactors(y)
y <- estimateCommonDisp(y)
y <- estimateTagwiseDisp(y)
test.pseudo <- y$pseudo.counts
pca.results <- prcomp(test.pseudo)
#PCA dataframe does not contain all samples as the exp.data, so I have to find which datapoints are control and polyic now
control.datapoints <- which(sample.info$Condition == "Saline")
polyic.datapoints <- which(sample.info$Condition == "PolyIC")
train.data <- data.frame(PCA.1=pca.results$rotation[control.datapoints,1],
PCA.2=pca.results$rotation[control.datapoints,2],
PCA.3=pca.results$rotation[control.datapoints,3],
PCA.4=pca.results$rotation[control.datapoints,4],
PCA.5=pca.results$rotation[control.datapoints,5],
rRNA=as.numeric(test.rRNA[control.datapoints]),
sex=sex.by.rna[control.datapoints],
lane=test.lane[control.datapoints],
response=test.response[control.datapoints]
)
predict.data <- data.frame(PCA.1=pca.results$rotation[polyic.datapoints,1],
PCA.2=pca.results$rotation[polyic.datapoints,2],
PCA.3=pca.results$rotation[polyic.datapoints,3],
PCA.4=pca.results$rotation[polyic.datapoints,4],
PCA.5=pca.results$rotation[polyic.datapoints,5],
rRNA=as.numeric(test.rRNA[polyic.datapoints]),
sex=sex.by.rna[polyic.datapoints],
lane=test.lane[polyic.datapoints],
response=test.response[polyic.datapoints]
)
lm.model <- lm(as.numeric(as.character(test.dpc[control.datapoints])) ~
PCA.1 + PCA.2 + PCA.3 + PCA.4 + PCA.5 + rRNA + sex + response
, data = train.data
)
# Predicts DPC from the linear model built on train data.
predict.saline <- predict(lm.model, newdata = train.data)
predict.polyic <- predict(lm.model, newdata = predict.data)
# I had some trouble with the original for loop and I replaced it with lapply. I also added the sd values and a plot representing modeled DPC values.
dpc.predict.mean.saline <- lapply(1:4, function(x) FUN=mean(predict.saline[which(as.character(test.dpc[control.datapoints])==unique(test.dpc)[x])]))
dpc.predict.sd.saline <- lapply(1:4, function(x) FUN=sd(predict.saline[which(as.character(test.dpc[control.datapoints])==unique(test.dpc)[x])]))
dpc.predict.mean.polyic <- lapply(1:4, function(x) FUN=mean(predict.polyic[which(as.character(test.dpc[polyic.datapoints])==unique(test.dpc)[x])]))
dpc.predict.sd.polyic <- lapply(1:4, function(x) FUN=sd(predict.polyic[which(as.character(test.dpc[polyic.datapoints])==unique(test.dpc)[x])]))
dpc.predict.mean.saline <- as.numeric(dpc.predict.mean.saline)
dpc.predict.sd.saline <- as.numeric(dpc.predict.sd.saline)
dpc.predict.mean.polyic <- as.numeric(dpc.predict.mean.polyic)
dpc.predict.sd.polyic <- as.numeric(dpc.predict.sd.polyic)
lm.predicted.dpc <- data.frame(dpc.predict.mean.saline, dpc.predict.sd.saline, dpc.predict.mean.polyic, dpc.predict.sd.polyic)
####
df_1 <- data.frame("Actual_DPC" = as.numeric(as.character(test.dpc[control.datapoints])),
"Predicted_DPC" = predict.saline, "Condition" = rep("Saline", length(predict.saline)))
df_2 <- data.frame("Actual_DPC" = as.numeric(as.character(test.dpc[polyic.datapoints])),
"Predicted_DPC" = predict.polyic, "Condition" = rep("Poly(I:C)", length(predict.polyic)))
df_combined <- rbind(df_1, df_2)
mean_data <- group_by(df_combined, Actual_DPC, Condition) %>%
summarise(average_predicted_dpc = mean(Predicted_DPC, na.rm = TRUE),
sd_predicted_dpc = sd(Predicted_DPC, na.rm = TRUE)
)
mean_data <- as.data.frame(mean_data)
j_brew_colors <- brewer.pal(n = 8, name = "Paired")[c(6,2)]
df_combined$Condition <- ifelse(df_combined$Condition == "Saline", "Control", as.character(df_combined$Condition))
mean_data$Condition <- ifelse(mean_data$Condition == "Saline", "Control", as.character(mean_data$Condition))
Fig_4c <- ggplot()+
geom_jitter(data=df_combined, aes(x = Actual_DPC, y = Predicted_DPC, color=Condition), size = 2, height= 0.3, alpha=0.7)+
geom_line(data=mean_data, aes(x = Actual_DPC, y = average_predicted_dpc, color=Condition), size=1.5, alpha=0.7)+
theme_bw()+
scale_color_manual(values=c("Control"=j_brew_colors[2], "Poly(I:C)" = j_brew_colors[1]))+
scale_x_continuous(breaks= c(12.5, 14.5, 17.5, 19.5), labels=c("E12.5", "E14.5", "E17.5", "P0"))+
scale_y_continuous(breaks= c(12.5, 14.5, 17.5, 19.5), labels=c("E12.5", "E14.5", "E17.5", "P0"))+
labs(y = "Predicted DPC", x = "Actual DPC")+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
Fig_4c
#Nicer dataframe
predicted.dpc.df <- data.frame("Real_DPC"=c(12.5, 14.5, 17.5, 19.5), "Predicted_Saline_Mean_DPC"= dpc.predict.mean.saline, "Predicted_Saline_SD_DPC"=dpc.predict.sd.saline, "Predicted_PolyIC_Mean_DPC"= dpc.predict.mean.polyic, "Predicted_PolyIC_SD_DPC"=dpc.predict.sd.polyic)
##### T test ####
t.test.12 <- t.test(predict.saline[which(group[control.datapoints]==1 & as.character(dpc[control.datapoints])=="12.5")], predict.polyic[which(group[polyic.datapoints]==2 & as.character(dpc[polyic.datapoints])=="12.5")])
t.test.14 <- t.test(predict.saline[which(group[control.datapoints]==1 & as.character(dpc[control.datapoints])=="14.5")], predict.polyic[which(group[polyic.datapoints]==2 & as.character(dpc[polyic.datapoints])=="14.5")])
t.test.17 <- t.test(predict.saline[which(group[control.datapoints]==1 & as.character(dpc[control.datapoints])=="17.5")], predict.polyic[which(group[polyic.datapoints]==2 & as.character(dpc[polyic.datapoints])=="17.5")])
t.test.19 <- t.test(predict.saline[which(group[control.datapoints]==1 & as.character(dpc[control.datapoints])=="19.5")], predict.polyic[which(group[polyic.datapoints]==2 & as.character(dpc[polyic.datapoints])=="19.5")])
t.test.modeled.DPC.p.val <- data.frame("p-value E12.5"=t.test.12$p.value, "p-value E14.5"=t.test.14$p.value, "p-value E17.5"=t.test.17$p.value, "p-value E19.5"=t.test.19$p.value)
predicted_DPC_df <- reshape2::melt(data.frame("mean E12.5"=t.test.12$estimate, "mean E14.5"=t.test.14$estimate, "mean E17.5"=t.test.17$estimate, "mean E19.5"=t.test.19$estimate))
t_test_p_value=c(t.test.12$p.value, t.test.14$p.value, t.test.17$p.value, t.test.19$p.value)
specify_decimal <- function(x, k) trimws(format(round(x, k), nsmall=k))
predicted_DPC_df_nice <- data.frame("Real_DPC"= c(12.5, 14.5, 17.5, 19.5), "Predicted_DPC_Saline"=predicted_DPC_df$value[c(1,3,5,7)], "Predicted_DPC_PolyIC"=predicted_DPC_df$value[c(2,4,6,8)], "t_test_p_value"=specify_decimal(t_test_p_value, 8))
knitr::kable(predicted_DPC_df_nice)
| Real_DPC | Predicted_DPC_Saline | Predicted_DPC_PolyIC | t_test_p_value |
|---|---|---|---|
| 12.5 | 12.95995 | 12.95008 | 0.98141714 |
| 14.5 | 14.63756 | 15.34472 | 0.12438056 |
| 17.5 | 16.68691 | 19.01691 | 0.00000121 |
| 19.5 | 19.48706 | 19.27017 | 0.40513411 |
single_timepoint_glm_function<- function(x){
control.datapoints <- intersect(control.datapoints, which(dpc == x))
polyic.datapoints <- intersect(polyic.datapoints, which(dpc == x))
use.cols <- c(control.datapoints, polyic.datapoints)
test.dpc <- dpc[use.cols]
test.sex.by.rna <- sex.by.rna[use.cols]
test.response <- response[use.cols]
test.rRNA <- as.numeric(rRNA)[use.cols]
test.group <- group[use.cols]
test.lane <- as.numeric(lane)[use.cols]
test.data <- exp.data[,use.cols]
min.cpm.criteria <- 0.1
min.cpm <- min.cpm.criteria
design <- model.matrix(~as.factor(test.lane) + as.factor(test.sex.by.rna) + as.numeric(test.group))
y <- DGEList(counts=test.data, group=group[use.cols])
keep <- rowSums(cpm(y)>min.cpm) >=2 #keeps only genes expressed in above min.cpm in at least 2 libraries in each group
y <- y[keep, , keep.lib.sizes=FALSE]
y <- estimateGLMCommonDisp(y,design)
y <- estimateGLMTrendedDisp(y,design)
y <- estimateGLMTagwiseDisp(y,design)
fit <- glmFit(y,design)
lrt <- glmLRT(fit) # Genewise Negative Binomial Generalized Linear Models.
glm.output <- topTags(lrt, n=Inf)
glm.output.full <- glm.output$table
glm.output.full$gene_name <- rownames(glm.output.full)
rownames(glm.output.full) <- NULL
glm.output.full <- glm.output.full[,c(6,1:5)]
glm.output.full
}
E12.5_GLM <- single_timepoint_glm_function(12.5)
E14.5_GLM <- single_timepoint_glm_function(14.5)
E17.5_GLM <- single_timepoint_glm_function(17.5)
E19.5_GLM <- single_timepoint_glm_function(19.5)
E12.5_up_n <- nrow(filter(E12.5_GLM, PValue < 0.05, logFC > 0))
E12.5_down_n <- nrow(filter(E12.5_GLM, PValue < 0.05, logFC < 0))
E14.5_up_n <- nrow(filter(E14.5_GLM, PValue < 0.05, logFC > 0))
E14.5_down_n <- nrow(filter(E14.5_GLM, PValue < 0.05, logFC < 0))
E17.5_up_n <- nrow(filter(E17.5_GLM, PValue < 0.05, logFC > 0))
E17.5_down_n <- nrow(filter(E17.5_GLM, PValue < 0.05, logFC < 0))
E19.5_up_n <- nrow(filter(E19.5_GLM, PValue < 0.05, logFC > 0))
E19.5_down_n <- nrow(filter(E19.5_GLM, PValue < 0.05, logFC < 0))
###
E12.5_up_n_FDR <- nrow(filter(E12.5_GLM, FDR < 0.05, logFC > 0))
E12.5_down_n_FDR <- nrow(filter(E12.5_GLM, FDR < 0.05, logFC < 0))
E14.5_up_n_FDR <- nrow(filter(E14.5_GLM, FDR < 0.05, logFC > 0))
E14.5_down_n_FDR <- nrow(filter(E14.5_GLM, FDR < 0.05, logFC < 0))
E17.5_up_n_FDR <- nrow(filter(E17.5_GLM, FDR < 0.05, logFC > 0))
E17.5_down_n_FDR <- nrow(filter(E17.5_GLM, FDR < 0.05, logFC < 0))
E19.5_up_n_FDR <- nrow(filter(E19.5_GLM, FDR < 0.05, logFC > 0))
E19.5_down_n_FDR <- nrow(filter(E19.5_GLM, FDR < 0.05, logFC < 0))
DE_df_n <- t(data.frame("E12.5" =c(E12.5_up_n, E12.5_down_n, E12.5_up_n_FDR, E12.5_down_n_FDR),
"E14.5" =c(E14.5_up_n, E14.5_down_n, E14.5_up_n_FDR, E14.5_down_n_FDR),
"E17.5" =c(E17.5_up_n, E17.5_down_n, E17.5_up_n_FDR, E17.5_down_n_FDR),
"P0" =c(E19.5_up_n, E19.5_down_n, E19.5_up_n_FDR, E19.5_down_n_FDR)))
colnames(DE_df_n) <- c("Upregulated", "Downregulated", "Upregulated", "Downregulated")
DE_df_n_melted <- melt(DE_df_n)
DE_df_n_melted$stat <- c(rep(", P < 0.05", 8), rep(", FDR < 0.05", 8))
DE_df_n_melted$Var2_stat <- paste(DE_df_n_melted$Var2, DE_df_n_melted$stat)
Fig1f <- ggplot(DE_df_n_melted, aes(fill=Var2_stat, group=Var2, x=Var1, y=value))+
geom_bar(position = "dodge", stat="identity", color="black")+
theme_bw()+
#scale_fill_manual(values=brewer.pal(n = 8, name = "Paired")[c(6,6,2,2 )])+
scale_fill_manual(values=c("#1F78B4", "#62a0ca", "#E31A1C", "#eb5e60"))+
theme(legend.title=element_blank())+
labs(title="Number of DE genes with P < 0.05 and FDR < 0.05", x="", y="")+
theme(plot.title = element_text(size = rel(1.2), hjust=0.5))+
geom_text(aes(label=value), position=position_dodge(width=0.9), hjust=-0.2)+
coord_flip()+
scale_x_discrete(limits = rev(levels(DE_df_n_melted$Var1)))+
theme(panel.border = element_blank(),
#legend.key = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
axis.text.y = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())+
theme(legend.position="bottom")
Fig1f
volcano_plot_text_2 <- function(x, title) {
#This is nicely dealing with datasets with missing cathegories.
test <- ifelse(x$PValue, "Non significant")
test <- ifelse(x$logFC > 0 & x$PValue <0.05, "PValue < 0.05 & logFC > 0", test)
test <- ifelse(x$logFC < 0 & x$PValue <0.05, "PValue < 0.05 & logFC < 0", test)
test <- ifelse(x$logFC > 0 & x$FDR <0.05, "FDR < 0.05 & logFC > 0", test)
test <- ifelse(x$logFC < 0 & x$FDR <0.05, "FDR < 0.05 & logFC < 0", test)
plotDat <- data.frame(x, Group=test)
plotDat$logFC <- ifelse(plotDat$logFC > 4, 4, plotDat$logFC)
plotDat$logFC <- ifelse(plotDat$logFC < -4, -4, plotDat$logFC)
p <- ggplot(plotDat, aes(x = logFC, y=-log10(PValue), fill=Group, col = Group)) +
geom_point(aes(text=gene_name), size=0.5, pch=21, alpha=0.7, stroke = 0.5)+
theme_light()+
scale_fill_manual(values=c("Non significant"="grey30",
"PValue < 0.05 & logFC > 0"="#eb5e60",
"PValue < 0.05 & logFC < 0"="#62a0ca",
"FDR < 0.05 & logFC > 0" = "#960304",
"FDR < 0.05 & logFC < 0" = "#01538a"))+
scale_color_manual(values=c("Non significant"="grey30",
"PValue < 0.05 & logFC > 0"="#eb5e60",
"PValue < 0.05 & logFC < 0"="#62a0ca",
"FDR < 0.05 & logFC > 0" = "#960304",
"FDR < 0.05 & logFC < 0" = "#01538a"))+
labs(title= title, y="", x="")+
theme(plot.title = element_text(size = rel(2), hjust=0.5))+
theme(legend.text=element_text(size=16))+
theme(axis.text=element_text(size=14))+
theme(legend.title=element_blank())+
theme(axis.text=element_text(size=14))+
theme(axis.title = element_text(size=14, face = "bold"))+
theme(legend.position="bottom")+
coord_cartesian(xlim = c(-4.2, 4.2))+
geom_hline(yintercept = -log10(0.05), linetype=2)+
scale_x_continuous(breaks=c(-4, -2, 0, 2, 4), labels=c("<-4","-2", "0", "2", ">4"))+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p) %>%
layout(legend = list(
orientation = "v",
y = -0.1,
font=list(
size=14
)
)
)
}
################# Version capped at different y axis levels ###############
volcano_plot_text_3 <- function(x, title) {
#This is nicely dealing with datasets with missing cathegories.
test <- ifelse(x$PValue, "Non significant")
test <- ifelse(x$logFC > 0 & x$PValue <0.05, "PValue < 0.05 & logFC > 0", test)
test <- ifelse(x$logFC < 0 & x$PValue <0.05, "PValue < 0.05 & logFC < 0", test)
test <- ifelse(x$logFC > 0 & x$FDR <0.05, "FDR < 0.05 & logFC > 0", test)
test <- ifelse(x$logFC < 0 & x$FDR <0.05, "FDR < 0.05 & logFC < 0", test)
plotDat <- data.frame(x, Group=test)
plotDat$logFC <- ifelse(plotDat$logFC > 4, 4, plotDat$logFC)
plotDat$logFC <- ifelse(plotDat$logFC < -4, -4, plotDat$logFC)
plotDat$log10_PValue <- -log10(plotDat$PValue)
plotDat$log10_PValue <- ifelse(plotDat$log10_PValue > 8, 8, plotDat$log10_PValue)
#scale_fill_manual(values=c("#1F78B4" - blue, "#62a0ca" - lighter blue, "#E31A1C" - red, "#eb5e60" - lighter red))+
p <- ggplot(plotDat, aes(x = logFC, y=log10_PValue, fill=Group, col = Group)) +
geom_point(aes(text=gene_name), size=0.5, pch=21, alpha=0.7, stroke = 0.5)+
theme_light()+
scale_fill_manual(values=c("Non significant"="grey30",
"PValue < 0.05 & logFC > 0"="#eb5e60",
"PValue < 0.05 & logFC < 0"="#62a0ca",
"FDR < 0.05 & logFC > 0" = "#960304",
"FDR < 0.05 & logFC < 0" = "#01538a"))+
scale_color_manual(values=c("Non significant"="grey30",
"PValue < 0.05 & logFC > 0"="#eb5e60",
"PValue < 0.05 & logFC < 0"="#62a0ca",
"FDR < 0.05 & logFC > 0" = "#960304",
"FDR < 0.05 & logFC < 0" = "#01538a"))+
labs(title= title, y="", x="")+
theme(plot.title = element_text(size = rel(2), hjust=0.5))+
theme(legend.text=element_text(size=16))+
theme(axis.text=element_text(size=14))+
theme(legend.title=element_blank())+
theme(axis.text=element_text(size=14))+
theme(axis.title = element_text(size=14, face = "bold"))+
theme(legend.position="bottom")+
coord_cartesian(xlim = c(-4.2, 4.2))+
expand_limits(x = c(-4.2, 4.2))+
coord_cartesian(ylim = c(0, 8))+
geom_hline(yintercept = -log10(0.05), linetype=2)+
scale_x_continuous(breaks=c(-4, -2, 0, 2, 4), labels=c("<-4","-2", "0", "2", ">4"))+
scale_y_continuous(breaks=c(0, 2, 4, 6, 8), labels=c("0","2", "4", "6", ">8"))+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggplotly(p) %>%
layout(legend = list(
orientation = "v",
y = -0.1,
font=list(
size=14
)
)
)
}
p <- volcano_plot_text_3(E12.5_GLM, "E12.5")
p
datatable(E12.5_GLM, rownames = FALSE, filter="top", options = list(pageLength = 15, scrollX=T))
p <- volcano_plot_text_3(E14.5_GLM, "E14.5")
p
datatable(E14.5_GLM, rownames = FALSE, filter="top", options = list(pageLength = 15, scrollX=T))
p <- volcano_plot_text_2(E17.5_GLM, "E17.5")
p
datatable(E17.5_GLM, rownames = FALSE, filter="top", options = list(pageLength = 15, scrollX=T))